data <- readRDS("life_expectancy_data.RDS")
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
# stat.test <- data %>% filter(continent %in% c('Africa', 'Americas')) %>%t_test(`Life expectancy` ~ continent) %>% add_xy_position(x = "continent")
# stat.test
# data %>% filter(continent %in% c('Africa', 'Americas')) %>% ggboxplot(x = "continent", y = "Life expectancy", ylab = "Life expectancy", xlab = "continent", add = "jitter") + labs(subtitle = get_test_label(stat.test, detailed = TRUE)) + stat_pvalue_manual(stat.test, tip.length = 0)
new_data <- data %>% select(!Year) %>% select(where(is.numeric))
data_cor <- cor(new_data)
data_cor
## Life expectancy Unemployment
## Life expectancy 1.0000000 -0.122453828
## Unemployment -0.1224538 1.000000000
## Infant Mortality -0.8309072 0.103775270
## GDP 0.1688143 -0.111437568
## GNI 0.1786383 -0.109486971
## Clean fuels and cooking technologies 0.7637583 0.063975082
## Per Capita 0.6034817 -0.183778304
## Mortality caused by road traffic injury -0.6518097 0.173386776
## Tuberculosis Incidence -0.5831370 0.121480015
## DPT Immunization 0.5044753 -0.147098313
## HepB3 Immunization 0.4023880 -0.102304627
## Measles Immunization 0.5323483 -0.166019661
## Hospital beds 0.4849944 -0.147215966
## Basic sanitation services 0.8515922 0.029209789
## Tuberculosis treatment -0.3214166 -0.051270213
## Urban population 0.5829745 0.085895193
## Rural population -0.5829745 -0.085895193
## Non-communicable Mortality -0.6338148 0.131242386
## Sucide Rate 0.1593575 0.006558983
## Infant Mortality GDP
## Life expectancy -0.83090718 0.16881430
## Unemployment 0.10377527 -0.11143757
## Infant Mortality 1.00000000 -0.16907590
## GDP -0.16907590 1.00000000
## GNI -0.16096299 0.99435959
## Clean fuels and cooking technologies -0.77567408 0.13751753
## Per Capita -0.31858469 0.17389277
## Mortality caused by road traffic injury 0.65050858 -0.11544108
## Tuberculosis Incidence 0.56359507 -0.08695088
## DPT Immunization -0.59029923 0.10813790
## HepB3 Immunization -0.52710787 0.08719372
## Measles Immunization -0.58504641 0.10139187
## Hospital beds -0.52085961 0.13217297
## Basic sanitation services -0.77056506 0.14600318
## Tuberculosis treatment 0.27989549 -0.02495462
## Urban population -0.47460671 0.15210771
## Rural population 0.47460671 -0.15210771
## Non-communicable Mortality 0.66661171 -0.19176139
## Sucide Rate 0.05980403 0.11409037
## GNI
## Life expectancy 0.17863826
## Unemployment -0.10948697
## Infant Mortality -0.16096299
## GDP 0.99435959
## GNI 1.00000000
## Clean fuels and cooking technologies 0.13470096
## Per Capita 0.18266349
## Mortality caused by road traffic injury -0.11353833
## Tuberculosis Incidence -0.08693997
## DPT Immunization 0.10537313
## HepB3 Immunization 0.08210454
## Measles Immunization 0.09835173
## Hospital beds 0.13351160
## Basic sanitation services 0.15141620
## Tuberculosis treatment -0.02115128
## Urban population 0.16783616
## Rural population -0.16783616
## Non-communicable Mortality -0.18266265
## Sucide Rate 0.13051116
## Clean fuels and cooking technologies
## Life expectancy 0.76375825
## Unemployment 0.06397508
## Infant Mortality -0.77567408
## GDP 0.13751753
## GNI 0.13470096
## Clean fuels and cooking technologies 1.00000000
## Per Capita 0.38753491
## Mortality caused by road traffic injury -0.59553317
## Tuberculosis Incidence -0.54553537
## DPT Immunization 0.44831815
## HepB3 Immunization 0.38243439
## Measles Immunization 0.49991332
## Hospital beds 0.43564106
## Basic sanitation services 0.84316638
## Tuberculosis treatment -0.31357924
## Urban population 0.59435666
## Rural population -0.59435666
## Non-communicable Mortality -0.65526759
## Sucide Rate 0.00976739
## Per Capita
## Life expectancy 0.60348166
## Unemployment -0.18377830
## Infant Mortality -0.31858469
## GDP 0.17389277
## GNI 0.18266349
## Clean fuels and cooking technologies 0.38753491
## Per Capita 1.00000000
## Mortality caused by road traffic injury -0.41639961
## Tuberculosis Incidence -0.30760893
## DPT Immunization 0.21421912
## HepB3 Immunization 0.09303434
## Measles Immunization 0.21571852
## Hospital beds 0.24966424
## Basic sanitation services 0.45409579
## Tuberculosis treatment -0.32644726
## Urban population 0.42880232
## Rural population -0.42880232
## Non-communicable Mortality -0.35531918
## Sucide Rate 0.32281997
## Mortality caused by road traffic injury
## Life expectancy -0.6518097
## Unemployment 0.1733868
## Infant Mortality 0.6505086
## GDP -0.1154411
## GNI -0.1135383
## Clean fuels and cooking technologies -0.5955332
## Per Capita -0.4163996
## Mortality caused by road traffic injury 1.0000000
## Tuberculosis Incidence 0.4123296
## DPT Immunization -0.3400658
## HepB3 Immunization -0.2628041
## Measles Immunization -0.3107449
## Hospital beds -0.4909601
## Basic sanitation services -0.6320693
## Tuberculosis treatment 0.3130249
## Urban population -0.3718674
## Rural population 0.3718674
## Non-communicable Mortality 0.4071462
## Sucide Rate -0.1102582
## Tuberculosis Incidence DPT Immunization
## Life expectancy -0.58313705 0.50447529
## Unemployment 0.12148001 -0.14709831
## Infant Mortality 0.56359507 -0.59029923
## GDP -0.08695088 0.10813790
## GNI -0.08693997 0.10537313
## Clean fuels and cooking technologies -0.54553537 0.44831815
## Per Capita -0.30760893 0.21421912
## Mortality caused by road traffic injury 0.41232959 -0.34006575
## Tuberculosis Incidence 1.00000000 -0.37169763
## DPT Immunization -0.37169763 1.00000000
## HepB3 Immunization -0.31215616 0.94776877
## Measles Immunization -0.37364785 0.88078924
## Hospital beds -0.19543396 0.32366629
## Basic sanitation services -0.55532307 0.45942955
## Tuberculosis treatment 0.23672979 -0.13993470
## Urban population -0.33622933 0.22057595
## Rural population 0.33622933 -0.22057595
## Non-communicable Mortality 0.48089925 -0.38159200
## Sucide Rate 0.09858654 0.05567581
## HepB3 Immunization Measles Immunization
## Life expectancy 0.40238797 0.53234834
## Unemployment -0.10230463 -0.16601966
## Infant Mortality -0.52710787 -0.58504641
## GDP 0.08719372 0.10139187
## GNI 0.08210454 0.09835173
## Clean fuels and cooking technologies 0.38243439 0.49991332
## Per Capita 0.09303434 0.21571852
## Mortality caused by road traffic injury -0.26280410 -0.31074490
## Tuberculosis Incidence -0.31215616 -0.37364785
## DPT Immunization 0.94776877 0.88078924
## HepB3 Immunization 1.00000000 0.86161432
## Measles Immunization 0.86161432 1.00000000
## Hospital beds 0.27225503 0.33526203
## Basic sanitation services 0.38112985 0.50904494
## Tuberculosis treatment -0.09250053 -0.14092951
## Urban population 0.13692089 0.24604275
## Rural population -0.13692089 -0.24604275
## Non-communicable Mortality -0.31401541 -0.38626279
## Sucide Rate -0.01978305 0.02560727
## Hospital beds Basic sanitation services
## Life expectancy 0.4849944 0.85159219
## Unemployment -0.1472160 0.02920979
## Infant Mortality -0.5208596 -0.77056506
## GDP 0.1321730 0.14600318
## GNI 0.1335116 0.15141620
## Clean fuels and cooking technologies 0.4356411 0.84316638
## Per Capita 0.2496642 0.45409579
## Mortality caused by road traffic injury -0.4909601 -0.63206935
## Tuberculosis Incidence -0.1954340 -0.55532307
## DPT Immunization 0.3236663 0.45942955
## HepB3 Immunization 0.2722550 0.38112985
## Measles Immunization 0.3352620 0.50904494
## Hospital beds 1.0000000 0.47445249
## Basic sanitation services 0.4744525 1.00000000
## Tuberculosis treatment -0.1947393 -0.30065649
## Urban population 0.2740715 0.55069603
## Rural population -0.2740715 -0.55069603
## Non-communicable Mortality -0.3562093 -0.52254411
## Sucide Rate 0.2665261 0.15953741
## Tuberculosis treatment Urban population
## Life expectancy -0.32141658 0.58297452
## Unemployment -0.05127021 0.08589519
## Infant Mortality 0.27989549 -0.47460671
## GDP -0.02495462 0.15210771
## GNI -0.02115128 0.16783616
## Clean fuels and cooking technologies -0.31357924 0.59435666
## Per Capita -0.32644726 0.42880232
## Mortality caused by road traffic injury 0.31302487 -0.37186744
## Tuberculosis Incidence 0.23672979 -0.33622933
## DPT Immunization -0.13993470 0.22057595
## HepB3 Immunization -0.09250053 0.13692089
## Measles Immunization -0.14092951 0.24604275
## Hospital beds -0.19473929 0.27407149
## Basic sanitation services -0.30065649 0.55069603
## Tuberculosis treatment 1.00000000 -0.28393086
## Urban population -0.28393086 1.00000000
## Rural population 0.28393086 -1.00000000
## Non-communicable Mortality 0.26680379 -0.53028884
## Sucide Rate -0.07289482 0.08936862
## Rural population
## Life expectancy -0.58297452
## Unemployment -0.08589519
## Infant Mortality 0.47460671
## GDP -0.15210771
## GNI -0.16783616
## Clean fuels and cooking technologies -0.59435666
## Per Capita -0.42880232
## Mortality caused by road traffic injury 0.37186744
## Tuberculosis Incidence 0.33622933
## DPT Immunization -0.22057595
## HepB3 Immunization -0.13692089
## Measles Immunization -0.24604275
## Hospital beds -0.27407149
## Basic sanitation services -0.55069603
## Tuberculosis treatment 0.28393086
## Urban population -1.00000000
## Rural population 1.00000000
## Non-communicable Mortality 0.53028884
## Sucide Rate -0.08936862
## Non-communicable Mortality Sucide Rate
## Life expectancy -0.6338148 0.159357534
## Unemployment 0.1312424 0.006558983
## Infant Mortality 0.6666117 0.059804035
## GDP -0.1917614 0.114090369
## GNI -0.1826627 0.130511162
## Clean fuels and cooking technologies -0.6552676 0.009767390
## Per Capita -0.3553192 0.322819969
## Mortality caused by road traffic injury 0.4071462 -0.110258162
## Tuberculosis Incidence 0.4808992 0.098586543
## DPT Immunization -0.3815920 0.055675815
## HepB3 Immunization -0.3140154 -0.019783046
## Measles Immunization -0.3862628 0.025607272
## Hospital beds -0.3562093 0.266526138
## Basic sanitation services -0.5225441 0.159537407
## Tuberculosis treatment 0.2668038 -0.072894819
## Urban population -0.5302888 0.089368619
## Rural population 0.5302888 -0.089368619
## Non-communicable Mortality 1.0000000 0.184023972
## Sucide Rate 0.1840240 1.000000000
corrplot(data_cor)
corrplot(data_cor, method = "color", type = "lower",
addCoef.col = "grey30", diag = FALSE,
cl.pos = "b", tl.col = "grey10",
col = COL2('RdBu', 10))
new_data_scaled <- scale(new_data)
new_data_dist <- dist(new_data_scaled,
method = "euclidean"
)
as.matrix(new_data_dist)[1:7,1:7]
## 1 2 3 4 5 6 7
## 1 0.000000 7.605708 6.331840 4.414874 6.645623 7.923487 6.871952
## 2 7.605708 0.000000 2.624659 7.921597 3.357361 3.631018 2.133443
## 3 6.331840 2.624659 0.000000 6.321666 4.350331 3.464837 1.838549
## 4 4.414874 7.921597 6.321666 0.000000 8.095849 7.161240 7.283456
## 5 6.645623 3.357361 4.350331 8.095849 0.000000 4.966244 3.718748
## 6 7.923487 3.631018 3.464837 7.161240 4.966244 0.000000 3.179530
## 7 6.871952 2.133443 1.838549 7.283456 3.718748 3.179530 0.000000
new_data_hc <- hclust(d = new_data_dist,
method = "ward.D2")
fviz_dend(new_data_hc,
cex = 0.1)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
pheatmap(
new_data_scaled,
show_rownames = FALSE,
clustering_distance_rows = new_data_dist,
clustering_method = "ward.D2",
cutree_rows = 5,
cutree_cols = length(colnames(new_data_scaled)),
angle_col = 45,
main = "Dendrograms for clustering rows and columns with map"
)
# На основе дендрограммы кластеризации было выделено 5 кластеров,кроме
того мы упорядочили колонки так, чтобы похожие колонки были ближе друг к
другу, красным отмечены более высокие значения, а голубым - более
низкие. Так стобцы GDP и GNI в 3 кластере включают больше
наблюдений.
new_data_pca <- prcomp(new_data_scaled,
scale = F)
new_data_pca$rotation
## PC1 PC2 PC3
## Life expectancy 0.33006689 0.057204617 -0.05177228
## Unemployment -0.04360407 0.118385016 -0.21744626
## Infant Mortality -0.31955172 0.087671945 0.03648213
## GDP 0.09276553 0.219603887 0.62719313
## GNI 0.09354118 0.228616655 0.62558678
## Clean fuels and cooking technologies 0.31191706 0.050540081 -0.12350100
## Per Capita 0.20074534 0.231091189 0.01931646
## Mortality caused by road traffic injury -0.25349810 -0.058468218 0.05350435
## Tuberculosis Incidence -0.23013005 0.040892548 0.08082447
## DPT Immunization 0.24212402 -0.427377944 0.12699330
## HepB3 Immunization 0.20937238 -0.477929780 0.13269426
## Measles Immunization 0.24602541 -0.401989482 0.10628068
## Hospital beds 0.20468586 0.002991494 0.05053434
## Basic sanitation services 0.31411211 0.053310140 -0.08827323
## Tuberculosis treatment -0.14131661 -0.138134329 0.14121743
## Urban population 0.24534367 0.308059080 -0.14538934
## Rural population -0.24534367 -0.308059080 0.14538934
## Non-communicable Mortality -0.26355324 -0.049510722 0.04557013
## Sucide Rate 0.03878119 0.163212116 0.13886389
## PC4 PC5 PC6
## Life expectancy 0.089513675 0.08036646 0.018565760
## Unemployment -0.216386935 -0.61561243 0.494582801
## Infant Mortality 0.078994634 -0.13004272 -0.218691072
## GDP -0.177142715 -0.02121091 0.103010691
## GNI -0.168469681 -0.03399209 0.090623806
## Clean fuels and cooking technologies -0.107962685 -0.01481436 0.210740240
## Per Capita 0.317879743 0.07476950 -0.391358563
## Mortality caused by road traffic injury -0.209266232 -0.26295907 -0.251606180
## Tuberculosis Incidence 0.156467649 -0.27269069 0.006312097
## DPT Immunization 0.026072879 -0.21158797 -0.144538424
## HepB3 Immunization -0.029261698 -0.23706435 -0.096100139
## Measles Immunization 0.007926382 -0.18891535 -0.144925604
## Hospital beds 0.335284926 0.06428811 0.349644874
## Basic sanitation services 0.057088341 -0.02959889 0.230295348
## Tuberculosis treatment -0.118308900 -0.01200118 -0.005160191
## Urban population -0.176581127 -0.26016879 -0.310930274
## Rural population 0.176581127 0.26016879 0.310930274
## Non-communicable Mortality 0.276485055 -0.20390510 0.085450283
## Sucide Rate 0.654299331 -0.35772127 -0.019335506
## PC7 PC8 PC9
## Life expectancy -0.02966855 -1.874076e-01 -0.126804914
## Unemployment 0.19219695 -1.710033e-01 -0.155292293
## Infant Mortality 0.07090199 -2.653059e-02 0.070234452
## GDP 0.08456071 -6.505088e-05 -0.005728914
## GNI 0.07444439 -9.965249e-03 0.005427434
## Clean fuels and cooking technologies -0.04081980 -9.629118e-02 -0.087330164
## Per Capita 0.19432111 -2.534085e-01 -0.450249421
## Mortality caused by road traffic injury 0.01924042 -3.314449e-02 0.278670279
## Tuberculosis Incidence -0.19254401 4.856493e-01 -0.671873972
## DPT Immunization 0.06274235 2.856350e-02 -0.028027646
## HepB3 Immunization 0.06715318 5.903168e-02 -0.029719944
## Measles Immunization 0.02887483 1.889758e-02 -0.014429381
## Hospital beds -0.31296677 4.855598e-01 0.256789546
## Basic sanitation services -0.03992494 -2.450440e-01 -0.046696803
## Tuberculosis treatment -0.81954312 -4.474203e-01 -0.143488181
## Urban population -0.19999539 1.557952e-01 0.128743039
## Rural population 0.19999539 -1.557952e-01 -0.128743039
## Non-communicable Mortality 0.05562817 -1.989353e-01 0.093895930
## Sucide Rate -0.06345358 -1.887176e-01 0.287808040
## PC10 PC11 PC12
## Life expectancy 0.084924190 0.13779658 0.11062481
## Unemployment 0.180440857 -0.33876116 -0.09529601
## Infant Mortality 0.010372306 -0.14839882 0.03623480
## GDP -0.032110598 -0.00936001 0.01721159
## GNI -0.041802877 -0.01269674 0.03313709
## Clean fuels and cooking technologies 0.048522215 0.32612297 0.07991169
## Per Capita 0.292347008 -0.37940437 0.20872964
## Mortality caused by road traffic injury 0.598413965 0.29574822 0.40805889
## Tuberculosis Incidence -0.023773929 0.32870474 0.04038651
## DPT Immunization -0.040977482 -0.11650266 -0.12551303
## HepB3 Immunization -0.066250343 -0.12673472 -0.08944260
## Measles Immunization 0.002942612 0.08431630 0.19760402
## Hospital beds 0.286894543 -0.37184766 0.29722319
## Basic sanitation services -0.061260655 0.33026350 0.31360286
## Tuberculosis treatment 0.057902695 -0.16262514 -0.05263385
## Urban population -0.202824477 -0.06229573 0.02564452
## Rural population 0.202824477 0.06229573 -0.02564452
## Non-communicable Mortality -0.570189360 -0.08715385 0.57042725
## Sucide Rate 0.089301394 0.26883371 -0.42298275
## PC13 PC14 PC15
## Life expectancy -0.486516678 -0.230496048 5.396929e-02
## Unemployment -0.069205415 -0.102131834 -6.399472e-02
## Infant Mortality 0.556918161 -0.365005913 3.976385e-01
## GDP 0.012961176 0.004560391 -1.697393e-02
## GNI -0.003176222 -0.020821126 1.428525e-02
## Clean fuels and cooking technologies 0.597852821 0.425259244 -2.562549e-01
## Per Capita 0.083406683 0.159042542 -6.939584e-02
## Mortality caused by road traffic injury -0.156075552 0.191616473 2.648679e-02
## Tuberculosis Incidence -0.052974407 0.002024872 4.593249e-02
## DPT Immunization -0.031864590 0.216906672 2.181899e-01
## HepB3 Immunization -0.005039022 0.252346213 2.906669e-01
## Measles Immunization 0.177399009 -0.595548459 -5.083456e-01
## Hospital beds 0.064929402 0.006696123 3.960996e-02
## Basic sanitation services 0.081058047 -0.209588283 5.819012e-01
## Tuberculosis treatment 0.033323536 0.004102715 -2.076603e-02
## Urban population -0.031452067 0.009692941 4.490951e-05
## Rural population 0.031452067 -0.009692941 -4.490951e-05
## Non-communicable Mortality -0.100710172 0.222548004 -1.475618e-01
## Sucide Rate -0.009840195 0.028046006 -9.053576e-02
## PC16 PC17
## Life expectancy -0.696266932 0.0010833200
## Unemployment 0.002664070 0.0191440029
## Infant Mortality -0.423704229 0.0385194485
## GDP 0.023394739 -0.0017777024
## GNI -0.048111465 0.0115060301
## Clean fuels and cooking technologies -0.300844342 0.0215577438
## Per Capita 0.170368470 -0.0474112984
## Mortality caused by road traffic injury 0.008712805 0.0119227383
## Tuberculosis Incidence -0.014695088 0.0151111488
## DPT Immunization -0.038820748 0.7365137990
## HepB3 Immunization -0.102859898 -0.6691391704
## Measles Immunization 0.129043569 -0.0314726643
## Hospital beds -0.046348258 0.0129550977
## Basic sanitation services 0.411576630 0.0055180110
## Tuberculosis treatment 0.016428620 -0.0008812946
## Urban population 0.020884322 -0.0104448446
## Rural population -0.020884322 0.0104448446
## Non-communicable Mortality -0.093980507 0.0254033161
## Sucide Rate 0.052305726 -0.0517403022
## PC18 PC19
## Life expectancy -0.0473220348 -2.142886e-16
## Unemployment 0.0007794072 -2.848376e-17
## Infant Mortality -0.0356819468 -6.740917e-17
## GDP -0.7055006478 1.278685e-15
## GNI 0.7057642782 -1.449871e-15
## Clean fuels and cooking technologies 0.0063834316 -7.245161e-16
## Per Capita 0.0100840283 -2.647930e-16
## Mortality caused by road traffic injury -0.0034193690 3.632334e-18
## Tuberculosis Incidence 0.0016041278 -6.911827e-17
## DPT Immunization -0.0090687898 1.068521e-16
## HepB3 Immunization 0.0007918953 8.727128e-17
## Measles Immunization 0.0073766000 -1.092427e-16
## Hospital beds -0.0040983059 -4.687964e-17
## Basic sanitation services 0.0010444312 3.057854e-16
## Tuberculosis treatment -0.0038228266 4.529913e-17
## Urban population -0.0084857085 7.071068e-01
## Rural population 0.0084857085 7.071068e-01
## Non-communicable Mortality -0.0134625522 -8.268647e-17
## Sucide Rate -0.0008333885 6.735392e-17
PC1, PC2, …, PC19 - это главные компоненты, и каждый столбец отражает вклад каждой переменной в данный компонент. Значения в каждой строке представляют собой вес или вклад соответствующей переменной в соответствующий главный компонент. Положительные и отрицательные значения указывают на направление влияния. Переменные с более высокими абсолютными значениями вносят больший вклад в главный компонент.
ggbiplot(new_data_pca,
groups = as.factor(data$continent),
ellipse = T,
scale=0, alpha = 0.1) +
theme_minimal()
summary(new_data_pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.7526 1.4841 1.3952 1.17177 1.08375 0.96347 0.9288
## Proportion of Variance 0.3988 0.1159 0.1025 0.07227 0.06182 0.04886 0.0454
## Cumulative Proportion 0.3988 0.5147 0.6172 0.68945 0.75126 0.80012 0.8455
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.85740 0.69263 0.68937 0.59106 0.54986 0.47085 0.36596
## Proportion of Variance 0.03869 0.02525 0.02501 0.01839 0.01591 0.01167 0.00705
## Cumulative Proportion 0.88421 0.90946 0.93447 0.95286 0.96877 0.98044 0.98749
## PC15 PC16 PC17 PC18 PC19
## Standard deviation 0.34546 0.26941 0.20224 0.06968 8.804e-16
## Proportion of Variance 0.00628 0.00382 0.00215 0.00026 0.000e+00
## Cumulative Proportion 0.99377 0.99759 0.99974 1.00000 1.000e+00
fviz_contrib(new_data_pca, choice = "var", axes = 1, top = 24) # 1
fviz_contrib(new_data_pca, choice = "var", axes = 2, top = 24) # 2
fviz_contrib(new_data_pca, choice = "var", axes = 3, top = 24) # 3
fviz_pca_var(new_data_pca,
select.var = list(contrib = 3), # Задаём число здесь
col.var = "contrib")
# Первые 3 компоненты объясняют 61,7% выриации данных.
umap_data <- recipe(~., data = new_data) %>%
step_normalize(all_predictors()) %>%
step_umap(all_predictors()) %>%
prep() %>%
juice()
umap_data %>%
ggplot(aes(UMAP1, 2)) + # # можно добавить раскраску
geom_point(aes(color = data$continent),
alpha = 0.7, size = 2) +
labs(color = NULL)